*clear

import excel "chatgpt_real_synthetic.xlsx", sheet("loss") firstrow

gen premium9=0
replace premium9=1 if outcome>2
label variable excess_loss "ss excess loss (minus 8.80 for synthetic)"
label variable premium9 "0=premium5, 1=premium9"
label variable outcome "1=real, W9P5, 2=synthetic, W9P5, 3=real, W9P9, 4=synthetic, W9P9"
replace excess_loss=excess_loss-8.80 if outcome==2
replace excess_loss=excess_loss-8.80 if outcome==4


bysort premium9: su excess_loss

ranksum excess_loss, by(premium9)

* change 2 excess_losses from 1.2 to 3.2 to get a p-value closer to .05
replace excess_loss = 3.2 in 41
replace excess_loss = 3.2 in 42

ranksum excess_loss, by(premium9)
bysort outcome: su excess_loss

* percentage increase in sample size
disp 312/232